/*

Copyright (C) 1998,1999,2000,2001  Franz Josef Och (RWTH Aachen - Lehrstuhl fuer Informatik VI)

This file is part of GIZA++ ( extension of GIZA ).

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
USA.

*/
#include "hmm.h"
#include "Globals.h"
#include "utility.h"
#include "HMMTables.h"
#include "ForwardBackward.h"
#include "Parameter.h"
#include <iostream>
#include "syncObj.h"
using namespace std;
#define CLASSIFY(i,empty,ianf) bool empty=(i>=l); unsigned int ianf=(i%l);
#define CLASSIFY2(i,ianf) unsigned int ianf=(i%l);


short PredictionInAlignments=0;
short UniformEntryExit=3;
short HMMTrainingSpecialFlags=0;

GLOBAL_PARAMETER2(int,ModelH_Dump_Freq,
                  "HMM DUMP FREQUENCY","th",
                  "dump frequency of HMM",
                  PARLEV_OUTPUT,0);

GLOBAL_PARAMETER(short,CompareAlDeps,"emAlignmentDependencies",
                 "lextrain: dependencies in the HMM alignment model. "
                 " &1: sentence length; &2: previous class; &4: previous position; "
                 " &8: French position; &16: French class"
                 ,PARLEV_MODELS,2);

GLOBAL_PARAMETER(double,GLOBALProbabilityForEmpty,
                 "emProbForEmpty","f-b-trn: probability for empty word",
                 PARLEV_MODELS,0.4);

GLOBAL_PARAMETER(short,SmoothHMM,"emSmoothHMM",
                 "f-b-trn: smooth HMM model &1: modified counts; &2:perform smoothing with -emAlSmooth",
                 PARLEV_SPECIAL,2);

GLOBAL_PARAMETER(double,HMMAlignmentModelSmoothFactor,"emAlSmooth",
                 "f-b-trn: smoothing factor for HMM alignment model (can be ignored by -emSmoothHMM)",
                 PARLEV_SMOOTH,0.2);


/*template<class T>
void smooth_standard(T*a,T*b,double p)
{
  int n=b-a;
  if( n==0 )
    return;
  double pp=p/n;
  for(T*i=a;i!=b;++i)
    *i = (1.0-p)*(*i)+pp;
}*/


hmm::hmm(model2&m2,WordClasses &e, WordClasses& f)
  :   model2(m2), ewordclasses(e), fwordclasses(f), counts(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses),
      probs(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses)
{
}


void hmm::initialize_table_uniformly(sentenceHandler&) {}

struct hmm_em_loop_t {
  hmm *m;
  int done;
  int valid;
  string alignfile;
  int it;
  bool dump_files;
  bool resume;
  pthread_t thread;
  hmm_em_loop_t():m(0),done(0),valid(0) {};
};

void* hmm_exe_emloop(void *arg)
{
  hmm_em_loop_t* em =(hmm_em_loop_t *) arg;
  em->m->em_thread(em->it,em->alignfile,em->dump_files,em->resume);
  em->done = -1;
  return arg;
}

void hmm::em_thread(int it,string alignfile,bool dump_files,bool resume)
{
  em_loop(perp, sHandler1,  dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
}
extern short NCPUS;
int hmm::em_with_tricks(int noIterations,bool dumpCount,
                        const char* dumpCountName, bool useString ,bool resume)
{
  double minErrors=1.0;
  int minIter=0;
  string modelName="Hmm",shortModelName="hmm";
  int dumpFreq=ModelH_Dump_Freq;
  time_t it_st, st, it_fn, fn;
  string tfile, afile,afileh, alignfile, test_alignfile;
  bool dump_files = false ;
  ofstream of2 ;
  st = time(NULL) ;
  sHandler1.rewind();
  cout << "\n==========================================================\n";
  cout << modelName << " Training Started at: " << my_ctime(&st);
  vector<hmm_em_loop_t> th;
  th.resize(NCPUS);
  for(int it=1; it <= noIterations ; it++) {
    it_st = time(NULL) ;
    cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
    dump_files = (dumpFreq != 0) && ((it % dumpFreq) == 0 || it == noIterations) && !NODUMPS;

    cerr << "Dump files " << dump_files << " it " << it << " noIterations " << noIterations << " dumpFreq " << dumpFreq <<endl;
    //dump_files = true;
    const string number = represent_number(it);
    tfile = Prefix + ".t" + shortModelName + "." + number ;
    afile = Prefix + ".a" + shortModelName + "." + number ;
    // acfile = Prefix + ".ac" + shortModelName + "." + number ;
    afileh = Prefix + ".h" + shortModelName + "." + number ;
    alignfile = Prefix + ".A" + shortModelName + "." + number ;
    test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
    counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
    aCountTable.clear();
    initAL();
    sHandler1.rewind();
    int k;
    for (k=1 ; k< NCPUS ; k++) {
      th[k].m = this;
      th[k].done = 0;
      th[k].valid = 0;
      th[k].it = it;
      th[k].resume = resume;
      th[k].alignfile = alignfile + ".part" + represent_number(k, 3);
      th[k].dump_files = dump_files;
      th[k].valid = pthread_create(&(th[k].thread),NULL,hmm_exe_emloop,&(th[k]));
      if(th[k].valid) {
        cerr << "Error starting thread " << k << endl;
      }
    }
    alignfile += ".part" + represent_number(0, 3);
    em_loop(perp, sHandler1,  dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1 && (!resume),it);
    for (k=1; k<NCPUS; k++) {
      pthread_join((th[k].thread),NULL);
      cerr << "Thread " << k << "done" << endl;
    }
    perp.record("HMM");
    trainViterbiPerp.record("HMM");
    errorReportAL(cout,"HMM");

    sHandler1.rewind();
    if( errorsAL()<minErrors ) {
      minErrors=errorsAL();
      minIter=it;
    }
    if (testPerp && testHandler) {
      testHandler->rewind();
      em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp,  true,it==1 && (!resume),it);
      testHandler->rewind();
    }
    if (dump_files&&OutputInAachenFormat==1)
      tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);

    if(dumpCount && it == noIterations) {
      string realTableName = dumpCountName;
      realTableName += ".t.count";
      tTable.printCountTable(realTableName.c_str(),Elist.getVocabList(),Flist.getVocabList(),useString);
      string realATableName = dumpCountName;
      realATableName += ".a.count";
      aCountTable.printRealTable(realATableName.c_str());
      string realHTableName = dumpCountName;
      realHTableName += ".h.count";
      string fnamealpha = realHTableName;
      string fnamebeta = realHTableName;
      fnamealpha += ".alpha";
      fnamebeta += ".beta";
      counts.writeJumps(realHTableName.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());

    }
    tTable.normalizeTable(Elist, Flist);
    aCountTable.normalize(aTable);
    probs=counts;
    cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
         << " PERPLEXITY " << perp.perplexity() << '\n';
    if (testPerp && testHandler)
      cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
           << " PERPLEXITY " << (*testPerp).perplexity()
           << '\n';
    cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
         << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
    if (testPerp && testHandler)
      cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
           << " PERPLEXITY " << testViterbiPerp->perplexity()
           << '\n';
    if (dump_files) {
      if( OutputInAachenFormat==0)
        tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
      // ofstream afilestream(afileh.c_str());
      string fnamealpha = afileh;
      string fnamebeta = afileh;
      fnamealpha += ".alpha";
      fnamebeta += ".beta";
      probs.writeJumps(afileh.c_str(),NULL,fnamealpha.c_str(),fnamebeta.c_str());
//            aCountTable.printTable(acfile.c_str());
      aTable.printTable(afile.c_str());
    }
    it_fn = time(NULL) ;
    cout << "\n" << modelName << " Iteration: " << it<< " took: " <<
         difftime(it_fn, it_st) << " seconds\n";
  } // end of iterations
  fn = time(NULL) ;
  cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
  //cout << "tTable contains " << tTable.getHash().bucket_count()
  //     << " buckets and  " << tTable.getHash().size() << " entries." ;
  cout << "==========================================================\n";
  return minIter;
}

/*template<class T>
T normalize_if_possible_with_increment(T*a,T*b,int increment)
{
  T sum=0;
  for(T*i=a;i!=b;i+=increment)
    sum+=*i;
  if( sum )
    for(T*i=a;i!=b;i+=increment)
      *i/=sum;
  else
    {
      T factor=increment/(b-a);
      for(T*i=a;i!=b;i+=increment)
	*i=factor;
    }
  return sum;
}*/

void hmm::load_table(const char* aname)
{
  cout << "Hmm: loading a table not implemented.\n";
  abort();
  ifstream anamefile(aname);
  probs.readJumps(anamefile);
}

HMMNetwork *hmm::makeHMMNetwork(const Vector<WordIndex>& es,const Vector<WordIndex>&fs,bool doInit)const
{
  unsigned int i,j;
  unsigned int l = es.size() - 1;
  unsigned int m = fs.size() - 1;
  unsigned int I=2*l,J=m;
  int IJ=I*J;
  bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
  bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
  HMMNetwork *net = new HMMNetwork(I,J);
  fill(net->alphainit.begin(),net->alphainit.end(),0.0);
  fill(net->betainit.begin(),net->betainit.end(),0.0);
  for(j=1; j<=m; j++) {
    for(i=1; i<=l; i++) {
      // cout << es[i] <<" " << fs[j] <<" " << tTable.getProb(es[i], fs[j]) << endl;
      net->n(i-1,j-1)=tTable.getProb(es[i], fs[j]) ;
    }
    double emptyContribution=0;
    emptyContribution=tTable.getProb(es[0],fs[j]) ;
    for(i=1; i<=l; i++)
      net->n(i+l-1,j-1)=emptyContribution;
    net->finalMultiply*=max(normalize_if_possible_with_increment(&net->n(0,j-1),&net->n(0,j-1)+IJ,J),double(1e-12));
  }
  if( DependencyOfJ )
    net->e.resize(m-1);
  else
    net->e.resize(J>1);
  for(j=0; j<net->e.size(); j++) {
    int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(j)+1)]);
    net->e[j].resize(I,I,0);
    for(unsigned int i1=0; i1<I; ++i1) {
      Array<double> al(l);
      CLASSIFY2(i1,i1real);
      for(unsigned int i2=0; i2<l; i2++)
        al[i2]=probs.getAlProb(i1real,i2,l,m,ewordclasses.getClass(es[1+i1real]),frenchClass
                               ,j+1);
      normalize_if_possible(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()));
      if( SmoothHMM&2 )
        smooth_standard(const_cast<double*>(&al[0]),const_cast<double*>((&al[0])+al.size()),HMMAlignmentModelSmoothFactor);
      for(unsigned int i2=0; i2<I; i2++) {
        CLASSIFY(i2,empty_i2,i2real);
        net->e[j](i1,i2)	    = al[i2real];

        if( empty_i2 )
          if(i1real!=i2real) {
            net->e[j](i1,i2)=0;
          } else {
            net->e[j](i1,i2)=doInit?al[0]:(probs.getProbabilityForEmpty()); // make first HMM iteration like IBM-1
          }
      }
      normalize_if_possible(&net->e[j](i1,0),&net->e[j](i1,0)+I);
    }
  }
  if( doInit ) {
    for(unsigned int i=0; i<I; ++i) {
      net->alphainit[i]=net->betainit[i]=(i<I/2)?1:(2.0/I);
      net->betainit[i]=1.0;
    }
  } else {
    if( DependencyOfPrevAJ==0 ) {
      for(i=0; i<I; i++) {
        CLASSIFY2(i,ireal);
        net->alphainit[i]=probs.getAlProb(-1,ireal,l,m,0,fwordclasses.getClass(fs[1+0]),0);
      }
    } else {
      if( UniformEntryExit&2 )probs.getBetaInit(I,net->betainit);
      if( UniformEntryExit&1 )probs.getAlphaInit(I,net->alphainit);
    }
  }
  massert( net->alphainit.size()==I );
  massert( net->betainit.size()==I );
  normalize_if_possible(const_cast<double*>(&(net->alphainit[0])),const_cast<double*>(&(net->alphainit[0])+net->alphainit.size()));
  normalize_if_possible(const_cast<double*>(&(net->betainit[0])),const_cast<double*>(&(net->betainit[0])+net->betainit.size()));
  transform(net->betainit.begin(),net->betainit.end(),net->betainit.begin(),bind1st(multiplies<double>(),2*l));
  return net;
}
extern float MINCOUNTINCREASE;

void hmm::em_loop(Perplexity& perp, sentenceHandler& sHandler1,
                  bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
                  bool test,bool doInit,int
                 )
{
  WordIndex i, j, l, m ;
  double cross_entropy;
  int pair_no=0 ;
  perp.clear();
  viterbi_perp.clear();
  ofstream of2;
  // for each sentence pair in the corpus
  if (dump_alignment||FEWDUMPS )
    of2.open(alignfile);
  sentPair sent ;

  while(sHandler1.getNextSentence(sent)) {
    const Vector<WordIndex>& es = sent.get_eSent();// #
    const Vector<WordIndex>& fs = sent.get_fSent();
    const float so  = sent.getCount();
    l = es.size() - 1;
    m = fs.size() - 1;
    cross_entropy = log(1.0);
    Vector<WordIndex> viterbi_alignment(fs.size());// #

    unsigned int I=2*l,J=m;
    bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
    bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
    HMMNetwork *net=makeHMMNetwork(es,fs,doInit);

    Array<double> gamma;
    Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
    double trainProb;
    trainProb=ForwardBackwardTraining(*net,gamma,epsilon);

    if( !test ) {

#ifdef WIN32
      double *gp=const_cast<double*>(&(gamma[0]));
#else
      double *gp=conv<double>(gamma.begin());
#endif

      for(unsigned int i2=0; i2<J; i2++)
        for(unsigned int i1=0; i1<I; ++i1,++gp) {
          if( *gp>MINCOUNTINCREASE ) {
            COUNT add= *gp*so;
            if( i1>=l ) {
              tTable.incCount(es[0],fs[1+i2],add);
              aCountTable.addValue(0,i2+1,l,m,add);
              //aCountTable.getRef(0,i2+1,l,m)+=add;
            } else	{
              tTable.incCount(es[1+i1],fs[1+i2],add);
              aCountTable.addValue(1+i1,1+i2,l,m,add);
              //aCountTable.getRef(1+i1,1+i2,l,m)+=add;
            }
          }
        }
      double p0c=0.0,np0c=0.0;
      for(unsigned int jj=0; jj<epsilon.size(); jj++) {
        int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
        double *ep=epsilon[jj].begin();
        if( ep ) {
          //for(i=0;i<I;i++)
          //  normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
          //		for(i=0;i<I*I;++i)
          //  ep[i] *= I;
          //if( DependencyOfJ )
          //  if( J-1 )
          //    for(i=0;i<I*I;++i)
          //      ep[i] /= (J-1);
          double mult=1.0;
          mult*=l;
          //if( DependencyOfJ && J-1)
          //  mult/=(J-1);
          for(i=0; i<I; i++) {
            for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
              CLASSIFY(i,i_empty,ireal);
              CLASSIFY2(i_bef,i_befreal);
              if( i_empty )
                p0c+=*ep * mult;
              else {
                int v = ewordclasses.getClass(es[1+i_befreal]);
                //cerr << v <<" " << es.size() << " "<< i_befreal << endl;
                counts.addAlCount(i_befreal,ireal,l,m,v,
                                  frenchClass ,jj+1,*ep * mult,0.0);
                np0c+=*ep * mult;
              }
              massert( &epsilon[jj](i,i_bef)== ep);
            }
          }
        }
      }

#ifdef WIN32
      double *gp1=const_cast<double *>(&(gamma[0])),*gp2=const_cast<double*>(&(gamma[0])+gamma.size())-I;
#else
      double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
#endif
      hmmentry_type&ai0=counts.doGetAlphaInit(I);
      Array<double>&ai = ai0.first;
      hmmentry_type&bi0=counts.doGetBetaInit(I);
      Array<double>&bi = bi0.first;
      int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
#ifdef WIN32
      ai0.second->lock();
#else
      ai0.second.lock();
#endif
      for(i=0; i<I; i++,gp1++) {
        CLASSIFY(i,i_empty,ireal);
        ai[i]+= *gp1;
        //bi[i]+= *gp2;
        if( DependencyOfPrevAJ==0 ) {
          if( i_empty )
            p0c+=*gp1;
          else {
            counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
            np0c+=*gp1;
          }
        }
      }
#ifdef WIN32
      ai0.second->unlock();
      bi0.second->lock();
#else
      ai0.second.unlock();
      bi0.second.lock();
#endif

      for(i=0; i<I; i++,gp2++) {
        CLASSIFY(i,i_empty,ireal);
        bi[i]+= *gp2;
      }
#ifdef WIN32
      bi0.second->unlock();
#else
      bi0.second.unlock();
#endif

      if( Verbose )
        cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
    }

    cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
    Array<int>vit;
    double viterbi_score=1.0;
    if( (HMMTrainingSpecialFlags&1) )
      HMMViterbi(*net,gamma,vit);
    else
      viterbi_score=HMMRealViterbi(*net,vit);
    for(j=1; j<=m; j++) {
      viterbi_alignment[j]=vit[j-1]+1;
      if( viterbi_alignment[j]>l)
        viterbi_alignment[j]=0;
    }
    sHandler1.setProbOfSentence(sent,cross_entropy);
    perp.addFactor(cross_entropy, so, l, m,1);
    viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
    if( Verbose )
      cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;

    delete net;
    net=0;
    if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
      printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
    addAL(viterbi_alignment,sent.getSentenceNo(),l);

    pair_no++;
  } /* of while */

}

void hmm::clearCountTable()
{
  counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
}

#if 0
CTTableDiff<COUNT,PROB>* hmm::em_loop_1(Perplexity& perp, sentenceHandler& sHandler1,
                                        bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
                                        bool test,bool doInit,int
                                       )
{
  CTTableDiff<COUNT,PROB> *diff = new CTTableDiff<COUNT,PROB>();
  //diff->incCount(1,1,0);
  WordIndex i, j, l, m ;
  double cross_entropy;
  int pair_no=0 ;
  perp.clear();
  viterbi_perp.clear();
  ofstream of2;
  // for each sentence pair in the corpus
  if (dump_alignment||FEWDUMPS )
    of2.open(alignfile);
  sentPair sent ;
  sHandler1.rewind();
  int nnn = 0;
  while(sHandler1.getNextSentence(sent)) {
    nnn ++;
    cout << nnn << endl;
    cout << 1 << endl;
    const Vector<WordIndex>& es = sent.get_eSent();
    const Vector<WordIndex>& fs = sent.get_fSent();
    const float so  = sent.getCount();
    l = es.size() - 1;
    m = fs.size() - 1;
    cross_entropy = log(1.0);
    Vector<WordIndex> viterbi_alignment(fs.size());

    unsigned int I=2*l,J=m;
    bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
    bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);
    cout << 2 << endl;
    HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
    Array<double> gamma;
    Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
    double trainProb;
    cout << 2.5 << endl;
    trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
    cout << 3 << endl;
    if( !test ) {
      double *gp=conv<double>(gamma.begin());
      cout << 4 << endl;
      for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
          if( *gp>MINCOUNTINCREASE ) {
            COUNT add= *gp*so;
            if( i1>=l ) {
              diff->incCount(es[0],fs[1+i2],add);
              //tTable.incCount(es[0],fs[1+i2],add);
              aCountTable.getRef(0,i2+1,l,m)+=add;
            } else	{
              diff->incCount(es[1+i1],fs[1+i2],add);
              //tTable.incCount(es[1+i1],fs[1+i2],add);
              aCountTable.getRef(1+i1,1+i2,l,m)+=add;
            }
          }
        }
      cout << 5 << endl;
      double p0c=0.0,np0c=0.0;
      for(unsigned int jj=0; jj<epsilon.size(); jj++) {
        if (nnn==7779) cout << 1 << endl;
        int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
        if (nnn==7779) cout << 2 << endl;
        double *ep=epsilon[jj].begin();
        if (nnn==7779) cout << 3 << endl;
        if( ep ) {
          //for(i=0;i<I;i++)
          //  normalize_if_possible_with_increment(ep+i,ep+i+I*I,I);
          //		for(i=0;i<I*I;++i)
          //  ep[i] *= I;
          //if( DependencyOfJ )
          //  if( J-1 )
          //    for(i=0;i<I*I;++i)
          //      ep[i] /= (J-1);
          double mult=1.0;
          mult*=l;
          //if( DependencyOfJ && J-1)
          //  mult/=(J-1);
          if (nnn==7779) cout << 4 << ":" << I  << endl;
          for(i=0; i<I; i++) {
            if (nnn==7779) cout << "i:" << i << endl;
            for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
              if (nnn==7779) cout << "   CL 1"   << endl;
              CLASSIFY(i,i_empty,ireal);
              if (nnn==7779) cout << "   CL 2 : "   << i_bef << " " <<  (size_t)ep << endl;
              CLASSIFY2(i_bef,i_befreal);
              if((i+1)*(i_bef+1)>epsilon[jj].getLen1()*epsilon[jj].getLen2()) {
                continue;
              }
              if( i_empty )
                p0c+=epsilon[jj](i,i_bef)*mult;// p0c+=*ep * mult;
              else {
                if (nnn==7779) cout << "ELSE" << endl;
                if (nnn==7779) {
                  cout << i_befreal<<" " <<ireal<<" " << l<<" " << m<<" "<< jj<<" "<<epsilon.size()<< " " << epsilon[jj].getLen1() <<" " << epsilon[jj].getLen2()<< endl;
                  np0c+=epsilon[jj](i,i_bef)*mult;
                  cout <<"..."<<endl;
                  cout <<"......"<<ewordclasses.getClass(es[1+i_befreal]) << endl;
                  cout <<"......"<<endl;
                  counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
                                    frenchClass ,jj+1,0,0.0);
                  np0c+=epsilon[jj](i,i_bef)*mult;
                } else {
                  counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
                                    frenchClass ,jj+1,epsilon[jj](i,i_bef)*mult,0.0);
                  np0c+=epsilon[jj](i,i_bef)*mult;
                }
              }
              if (nnn==7779) cout << "FI" << endl;
              massert( &epsilon[jj](i,i_bef)== ep);
            }
          }
          if (nnn==7779) cout << 5 << endl;
        }
      }
      //    cout << 6 << endl;
      double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
      Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
      Array<double>&bi=counts.doGetBetaInit(I);
      int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
      for(i=0; i<I; i++,gp1++,gp2++) {
        CLASSIFY(i,i_empty,ireal);
        ai[i]+= *gp1;
        bi[i]+= *gp2;
        if( DependencyOfPrevAJ==0 ) {
          if( i_empty )
            p0c+=*gp1;
          else {
            counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
            np0c+=*gp1;
          }
        }
      }
      //  cout << 7 << endl;
      if( Verbose )
        cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
    }
    //cout << 8 << endl;
    cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
    Array<int>vit;
    double viterbi_score=1.0;
    //cout << 9 << endl;
    if( (HMMTrainingSpecialFlags&1) )
      HMMViterbi(*net,gamma,vit);
    else
      viterbi_score=HMMRealViterbi(*net,vit);
    //cout << 10 << endl;
    for(j=1; j<=m; j++) {
      viterbi_alignment[j]=vit[j-1]+1;
      if( viterbi_alignment[j]>l)
        viterbi_alignment[j]=0;
    }
    //cout << 11 << endl;
    sHandler1.setProbOfSentence(sent,cross_entropy);
    //cout << 12 << endl;
    perp.addFactor(cross_entropy, so, l, m,1);
    viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
    if( Verbose )
      cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
    delete net;
    net=0;
    //cout << 13 << endl;
    if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
      printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
    //cout << 14 << endl;
    addAL(viterbi_alignment,sent.getSentenceNo(),l);
    pair_no++;
  } /* of while */
  sHandler1.rewind();
  perp.record("HMM");
  viterbi_perp.record("HMM");
  errorReportAL(cout,"HMM");
  return diff;
}

#endif
Mutex mu;

#if 0
void hmm::em_loop_2(Perplexity& perp, sentenceHandler& sHandler1,
                    bool dump_alignment, const char* alignfile, Perplexity& viterbi_perp,
                    bool test,bool doInit,int part
                   )
{
  WordIndex i, j, l, m ;
  double cross_entropy;
  int pair_no=0 ;
  perp.clear();
  viterbi_perp.clear();
  ofstream of2;
  // for each sentence pair in the corpus
  if (dump_alignment||FEWDUMPS )
    of2.open(alignfile);
  sentPair sent ;
  //sHandler1.rewind();
  int nnn = 0;
  while(sHandler1.getNextSentence(sent)) {
    //nnn ++;
    //cout << nnn << endl;
    //cout << 1 << endl;
    const Vector<WordIndex>& es = sent.get_eSent();
    const Vector<WordIndex>& fs = sent.get_fSent();
    const float so  = sent.getCount();
    l = es.size() - 1;
    m = fs.size() - 1;
    cross_entropy = log(1.0);
    Vector<WordIndex> viterbi_alignment(fs.size());

    unsigned int I=2*l,J=m;
    bool DependencyOfJ=(CompareAlDeps&(16|8))||(PredictionInAlignments==2);
    bool DependencyOfPrevAJ=(CompareAlDeps&(2|4))||(PredictionInAlignments==0);

    HMMNetwork *net=makeHMMNetwork(es,fs,doInit);
    Array<double> gamma;
    Array<Array2<double> > epsilon(DependencyOfJ?(m-1):1);
    double trainProb;
    trainProb=ForwardBackwardTraining(*net,gamma,epsilon);
    if( !test ) {
      double *gp=conv<double>(gamma.begin());
      for(unsigned int i2=0; i2<J; i2++)for(unsigned int i1=0; i1<I; ++i1,++gp) {
          if( *gp>MINCOUNTINCREASE ) {
            COUNT add= *gp*so;
            if( i1>=l ) {
              //diff->incCount(es[0],fs[1+i2],add);
              tTable.incCount(es[0],fs[1+i2],add);
              aCountTable.getRef(0,i2+1,l,m)+=add;
            } else	{
              //diff->incCount(es[1+i1],fs[1+i2],add);
              tTable.incCount(es[1+i1],fs[1+i2],add);
              aCountTable.getRef(1+i1,1+i2,l,m)+=add;
            }
          }
        }
      double p0c=0.0,np0c=0.0;
      for(unsigned int jj=0; jj<epsilon.size(); jj++) {
        int frenchClass=fwordclasses.getClass(fs[1+min(int(m)-1,int(jj)+1)]);
        double *ep=epsilon[jj].begin();
        if( ep ) {
          double mult=1.0;
          mult*=l;
          //if( DependencyOfJ && J-1)
          //  mult/=(J-1);
          for(i=0; i<I; i++) {
            for(unsigned int i_bef=0; i_bef<I; i_bef++,ep++) {
              CLASSIFY(i,i_empty,ireal);
              CLASSIFY2(i_bef,i_befreal);
              if( i_empty ) {
                p0c+=*ep * mult;
              } else {
                //mu.lock();
                //cout<<"\rP "<<part<<" ";
                //cout<<epsilon.size()<<" "<<jj<<" ";
                //cout<<epsilon[jj].h1<<" " << epsilon[jj].h2<<" ";
                //cout<<i<<" "<<i_bef<<" ";
                //cout<<I<<" "<<J<<"             ";

                cout.flush();
                counts.addAlCount(i_befreal,ireal,l,m,ewordclasses.getClass(es[1+i_befreal]),
                                  frenchClass ,jj+1,*ep * mult,0.0);
                np0c+=*ep * mult;
                //mu.unlock();
              }
              massert( &epsilon[jj](i,i_bef)== ep);
            }
          }
        }
      }
      double *gp1=conv<double>(gamma.begin()),*gp2=conv<double>(gamma.end())-I;
      Array<double>&ai=counts.doGetAlphaInit(I);/*If it is not get yet, init it, all operation envolved is add*/
      Array<double>&bi=counts.doGetBetaInit(I);
      int firstFrenchClass=(fs.size()>1)?(fwordclasses.getClass(fs[1+0])):0;
      for(i=0; i<I; i++,gp1++,gp2++) {
        CLASSIFY(i,i_empty,ireal);
        ai[i]+= *gp1;
        bi[i]+= *gp2;
        if( DependencyOfPrevAJ==0 ) {
          if( i_empty )
            p0c+=*gp1;
          else {
            counts.addAlCount(-1,ireal,l,m,0,firstFrenchClass,0,*gp1,0.0);
            np0c+=*gp1;
          }
        }
      }
      //  cout << 7 << endl;
      if( Verbose )
        cout << "l: " << l << "m: " << m << " p0c: " << p0c << " np0c: " << np0c << endl;
    }
    //cout << 8 << endl;
    cross_entropy+=log(max(trainProb,1e-100))+log(max(net->finalMultiply,1e-100));
    Array<int>vit;
    double viterbi_score=1.0;
    //cout << 9 << endl;
    if( (HMMTrainingSpecialFlags&1) )
      HMMViterbi(*net,gamma,vit);
    else
      viterbi_score=HMMRealViterbi(*net,vit);
    //cout << 10 << endl;
    for(j=1; j<=m; j++) {
      viterbi_alignment[j]=vit[j-1]+1;
      if( viterbi_alignment[j]>l)
        viterbi_alignment[j]=0;
    }
    //cout << 11 << endl;
    sHandler1.setProbOfSentence(sent,cross_entropy);
    //cout << 12 << endl;
    perp.addFactor(cross_entropy, so, l, m,1);
    viterbi_perp.addFactor(log(viterbi_score)+log(max(net->finalMultiply,1e-100)), so, l, m,1);
    if( Verbose )
      cout << "Viterbi-perp: " << log(viterbi_score) << ' ' << log(max(net->finalMultiply,1e-100)) << ' ' << viterbi_score << ' ' << net->finalMultiply << ' ' << *net << "gamma: " << gamma << endl;
    delete net;
    net=0;
    //cout << 13 << endl;
    if (dump_alignment||(FEWDUMPS&&sent.getSentenceNo()<1000) )
      printAlignToFile(es, fs, Elist.getVocabList(), Flist.getVocabList(), of2, viterbi_alignment, sent.getSentenceNo(), viterbi_score);
    //cout << 14 << endl;
    addAL(viterbi_alignment,sent.getSentenceNo(),l);
    pair_no++;
  } /* of while */


  return ;
}


CTTableDiff<COUNT,PROB>* hmm::em_one_step(int it)
{
  double minErrors=1.0;
  int minIter=0;
  string modelName="Hmm",shortModelName="hmm";
  int dumpFreq=ModelH_Dump_Freq;
  time_t it_st, st, it_fn, fn;
  string tfile, afile,afileh, alignfile, test_alignfile;
  int pair_no = 0;
  bool dump_files = false ;
  ofstream of2 ;
  st = time(NULL) ;
  sHandler1.rewind();
  cout << "\n==========================================================\n";
  cout << modelName << " Training Started at: " << my_ctime(&st);
  pair_no = 0;

  cout << endl << "-----------\n" << modelName << ": Iteration " << it << '\n';
  dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
  const string number = represent_number(it);
  tfile = Prefix + ".t" + shortModelName + "." + number ;
  afile = Prefix + ".a" + shortModelName + "." + number ;
  afileh = Prefix + ".h" + shortModelName + "." + number ;
  alignfile = Prefix + ".AH" ;
  test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
  counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
  aCountTable.clear();
  initAL();
  CTTableDiff<COUNT,PROB>* diff =em_loop_1(perp, sHandler1,  dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,it);

  if( errorsAL()<minErrors ) {
    minErrors=errorsAL();
    minIter=it;
  }
  //        if (testPerp && testHandler)
//            em_loop(*testPerp, *testHandler, dump_files, test_alignfile.c_str(), *testViterbiPerp,  true,it==1,it);
//        if (dump_files&&OutputInAachenFormat==1)
//            tTable.printCountTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),1);
//        tTable.normalizeTable(Elist, Flist);
//        aCountTable.normalize(aTable);
//        probs=counts;
//        cout << modelName << ": ("<<it<<") TRAIN CROSS-ENTROPY " << perp.cross_entropy()
//            << " PERPLEXITY " << perp.perplexity() << '\n';
//        if (testPerp && testHandler)
//            cout << modelName << ": ("<<it<<") TEST CROSS-ENTROPY " << (*testPerp).cross_entropy()
//            << " PERPLEXITY " << (*testPerp).perplexity()
//            << '\n';
//        cout << modelName << ": ("<<it<<") VITERBI TRAIN CROSS-ENTROPY " << trainViterbiPerp.cross_entropy()
//            << " PERPLEXITY " << trainViterbiPerp.perplexity() << '\n';
//        if (testPerp && testHandler)
//            cout << modelName << ": ("<<it<<") VITERBI TEST CROSS-ENTROPY " << testViterbiPerp->cross_entropy()
//            << " PERPLEXITY " << testViterbiPerp->perplexity()
//            << '\n';
//        if (dump_files){
//            if( OutputInAachenFormat==0)
///                tTable.printProbTable(tfile.c_str(),Elist.getVocabList(),Flist.getVocabList(),OutputInAachenFormat);
//           ofstream afilestream(afileh.c_str());
//           probs.writeJumps(afilestream);
//           aCountTable.printTable(afile.c_str());

  fn = time(NULL) ;
  cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
  //cout << "tTable contains " << tTable.getHash().bucket_count()
  //     << " buckets and  " << tTable.getHash().size() << " entries." ;
  cout << "==========================================================\n";
  return diff;
}


void hmm::em_one_step_2(int it,int part)
{
  double minErrors=1.0;
  int minIter=0;
  string modelName="Hmm",shortModelName="hmm";
  int dumpFreq=ModelH_Dump_Freq;
  time_t it_st, st, it_fn, fn;
  string tfile, afile,afileh, alignfile, test_alignfile;
  int pair_no = 0;
  bool dump_files = false ;
  ofstream of2 ;

  pair_no = 0;


  dump_files = true ;//(dumpFreq != 0) && ((it % dumpFreq) == 0) && !NODUMPS;
  const string number = represent_number(it, 3);
  tfile = Prefix + ".t" + shortModelName + "." + number ;
  afile = Prefix + ".a" + shortModelName + "." + number ;
  afileh = Prefix + ".h" + shortModelName + "." + number ;
  alignfile = Prefix + ".Ahmm." + represent_number(it, 3) ;
  alignfile += ".part" + represent_number(part, 3);

  counts=HMMTables<int,WordClasses>(GLOBALProbabilityForEmpty,ewordclasses,fwordclasses);
  aCountTable.clear();
  initAL();
  em_loop_2(perp, sHandler1,  dump_files , alignfile.c_str(), trainViterbiPerp, false,it==1,part);

  if( errorsAL()<minErrors ) {
    minErrors=errorsAL();
    minIter=it;
  }
  return ;
}

struct hmm_align_struct {
  hmm *h;
  int part;
  int iter;
  int valid;
  pthread_t thread;
  int done;
};

void* em_thread(void *arg)
{
  hmm_align_struct * hm = (hmm_align_struct*) arg;
  hm->h->em_one_step_2(hm->iter,hm->part);
  hm->done = 1;
  return hm;
}


int multi_thread_em(int noIter, int noThread, hmm* base)
{
  // First, do one-step EM
  int i;
  int j;
  time_t it_st, st, it_fn, fn;
  fn = time(NULL);
  int dumpFreq=ModelH_Dump_Freq;
  bool dump_files = false ;
  string modelName = "HMM",shortModelName="hmm";
  string tfile, afile,acfile,afileh, number, alignfile, test_alignfile;
  vector<amodel<COUNT> > counts;
  vector<model2 *> m2;
  counts.resize(noThread);
  m2.resize(noThread);
  for(j=1; j<noThread; j++) {
    m2[j] = new model2(*((model1*)base),base->aTable,counts[j]);
  }
  st  = time(NULL);
  cout << "\n==========================================================\n";
  cout << modelName << " Training Started at: " << my_ctime(&st);

  for(i=1; i<=noIter; i++) {
    base->perp.clear();
    base->trainViterbiPerp.clear();
    if (base->testPerp && base->testHandler) {
      base->testHandler->rewind();
      base->testPerp->clear();
      base->testViterbiPerp->clear();
    }

    it_st = time(NULL) ;

    cout << endl << "-----------\n" << modelName << ": Iteration " << i << '\n';
    dump_files = (dumpFreq != 0) && ((i % dumpFreq) == 0) && !NODUMPS;
    dump_files = true;
    const string number = represent_number(i);
    tfile = Prefix + ".t" + shortModelName + "." + number ;
    afile = Prefix + ".a" + shortModelName + "." + number ;
    acfile = Prefix + ".ac" + shortModelName + "." + number ;
    afileh = Prefix + ".h" + shortModelName + "." + number ;

    alignfile = Prefix + ".A" + shortModelName + "." + number ;
    test_alignfile = Prefix + ".tst.A" + shortModelName + "." + number ;
    base->initAL();
    // except the current thread
    vector<hmm_align_struct> args;
    base->sHandler1.rewind();
    args.resize(noThread);
    for(j=1; j<noThread; j++) {
      args[j].iter = i;
      args[j].part = j;
      args[j].done = 0;
      counts[j].clear();
      args[j].h = new hmm(*m2[j],base->ewordclasses,base->fwordclasses);
      args[j].h->probs = base->probs;
      args[j].valid = pthread_create(&(args[j].thread),NULL,em_thread,&(args[j]));
      if(args[j].valid) {
        cerr << "Error starting thread " << j << endl;
      }
    }
    base->em_one_step_2(i,0);
    //ofstream afilestream(afileh.c_str());
    while(1) {
      bool done = true;
      for (j=1; j<noThread; j++) {
        //pthread_join((args[j].thread),NULL);
        // Start normalization as soon as possible
        if(args[j].done==1) {
          args[j].done = 2;
          base->aCountTable.merge(args[j].h->aCountTable);
          //afilestream << "BEFORE MERGE"<<endl;
          //base->counts.writeJumps(afilestream);
          //afilestream << "MERGING"<<endl;
          //args[j].h->counts.writeJumps(afilestream);
          //afilestream << "MERGED"<<endl;
          base->counts.merge(args[j].h->counts);
          //base->counts.writeJumps(afilestream);
          delete args[j].h;
          args[j].h = 0;
        } else if(args[j].done==2) {
          // Nothing
        } else if(args[j].done==0) {
          done = false;
        }
      }
      if(done) break;
    }
    base->perp.record("HMM");
    base->trainViterbiPerp.record("HMM");
    base->errorReportAL(cout,"HMM");

    // Normalize
//        cout <<" Writing " << afileh  <<"\n";
    base->probs = base->counts;
//        cout <<" Writing " << afileh  <<"\n";
//        ofstream afilestream(afileh.c_str());
//        base->probs.writeJumps(afilestream);
    base->tTable.normalizeTable(base->Elist, base->Flist);
    base->aCountTable.normalize(base->aTable);
    base->aCountTable.clear();
    if (base->testPerp && base->testHandler)
      base->em_loop(*base->testPerp, *base->testHandler, dump_files, test_alignfile.c_str(), *base->testViterbiPerp,  true,i==1,i);
    if (dump_files&&OutputInAachenFormat==1)
      base->tTable.printCountTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),1);
    cout << modelName << ": ("<<i<<") TRAIN CROSS-ENTROPY " << base->perp.cross_entropy()
         << " PERPLEXITY " << base->perp.perplexity() << '\n';
    if (base->testPerp && base->testHandler)
      cout << modelName << ": ("<<i<<") TEST CROSS-ENTROPY " << base->testPerp->cross_entropy()
           << " PERPLEXITY " << base->testPerp->perplexity()
           << '\n';
    cout << modelName << ": ("<<i<<") VITERBI TRAIN CROSS-ENTROPY " << base->trainViterbiPerp.cross_entropy()
         << " PERPLEXITY " << base->trainViterbiPerp.perplexity() << '\n';
    if (base->testPerp && base->testHandler)
      cout << modelName << ": ("<<i<<") VITERBI TEST CROSS-ENTROPY " << base->testViterbiPerp->cross_entropy()
           << " PERPLEXITY " << base->testViterbiPerp->perplexity()
           << '\n';
    dump_files = true;
    if (dump_files) {
      if( OutputInAachenFormat==0)
        base->tTable.printProbTable(tfile.c_str(),base->Elist.getVocabList(),base->Flist.getVocabList(),OutputInAachenFormat);
      ofstream afilestream(afileh.c_str());
      base->counts.writeJumps(afilestream);
      //base->counts.clear();
      base->aCountTable.printTable(acfile.c_str());
      base->aTable.printTable(afile.c_str());
    }
    it_fn = time(NULL) ;

    cout << "\n" << modelName << " Iteration: " << i<< " took: " <<
         difftime(it_fn, it_st) << " seconds\n";

  }
  for(j=1; j<noThread; j++) {
    delete m2[j];
  }
  cout << endl << "Entire " << modelName << " Training took: " << difftime(fn, st) << " seconds\n";
  return 1;
}



#endif
#include "HMMTables.cpp"
template class HMMTables<int,WordClasses>;

